\relax 
\citation{WHO}
\citation{HirshStableAssociationPNAS}
\citation{VariableHostPathogen}
\citation{HostPathogenMaps_BIBM_CAME}
\citation{L2normMoreau}
\citation{LanckrietStatFrameworkGDF,GenePriorGDFmoreau}
\citation{TCF_MBT_BMC}
\citation{GenomeInteractomeLageMoreau}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}}
\citation{KamerbeekSpoligotyping}
\citation{InsightsIntoBIBM2011,InferredSpolIEEETNB2012}
\citation{ChengChurch}
\citation{DhillonSpectralBiclustering}
\citation{KlugerSpectral}
\citation{SAMBAtanay}
\citation{CTWCgetz}
\citation{BiMax}
\citation{DECOBrecep}
\citation{MadeiraSurvey}
\@writefile{toc}{\contentsline {section}{\numberline {2}Background}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Spoligotyping}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Biclustering}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Host-pathogen association analysis: a biclustering problem}{2}}
\citation{CBN}
\citation{KBBN}
\citation{SITVITWEB}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Host-pathogen tensor (HPT). The first mode represents spoligotypes, the second mode represents countries, and the third mode represents time. This HPT of the form \emph  {Spoligotypes} $\times $ \emph  {Countries} $\times $ \emph  {Time} is denoted as $\mathbf  {\relax $\@@underline {\hbox {X}}\mathsurround \z@ $\relax } \in \mathbb  {R}^{(I=311) \times (J=104) \times (K=7)}$.\relax }}{3}}
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{HPT}{{1}{3}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Methods}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}The dataset}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Distance matrices}{3}}
\citation{InsightsIntoBIBM2011}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}Genetic proximity matrix}{4}}
\newlabel{CDA}{{3.2.1}{4}}
\newlabel{P_G}{{3.2.1}{4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.2}Spatial proximity matrix}{4}}
\newlabel{CNM}{{3.2.2}{4}}
\newlabel{P_S}{{3.2.2}{4}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Unified Biclustering Framework (UBF). In the first step, the data is generated as a matrix, a tensor, a coupled matrix-matrix, or a coupled matrix-tensor. In the second step, the data in various forms are factorized. In the third step, feature pattern similarity matrix is generated using the factor matrices of the decomposition. In the fourth step, we bicluster data points using \texttt  {DensityInvariantBiclustering} algorithm. In the final step, we find the most stable biclusters using average best-match score.\relax }}{5}}
\newlabel{UBF}{{2}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}UBF: Unified Biclustering Framework}{5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Data generation}{5}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.2}Data factorization}{6}}
\@writefile{toc}{\contentsline {paragraph}{\numberline {3.3.2.1}\textbf  {Coupled matrix-matrix factorization (CMMF)}}{6}}
\@writefile{toc}{\contentsline {subparagraph}{\numberline {3.3.2.1.1}\textbf  {CMMF\_ALS}}{6}}
\newlabel{ApproximationCMMF}{{1}{6}}
\newlabel{LossFunction}{{2}{6}}
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Data configurations. The mode name S represents spoligotypes, C represents countries, and T represents time in years. The first configuration is a raw $Spoligotypes \times Countries$ matrix, and analyzed using Matrix Biclustering Framework (MBF) as part of UBF. The second data configuration includes time information as the third mode of the tensor, and analyzed using Tensor Biclustering Framework (TBF) as part of UBF. Third, fourth and fifth data configurations are the results of concatenating the genetic proximity matrix, spatial proximity matrix, and both respectively, to the host-pathogen tensor. They are analyzed using Coupled Matrix-Tensor Biclustering Framework (CMTBF) as part of UBF. Finally, in data configuration 6, we exclude time information and analyze the resulting data using coupled matrix-matrix biclustering framework (CMMBF) as part of UBF.\relax }}{7}}
\newlabel{DataConfigurations}{{1}{7}}
\newlabel{CMMF}{{3}{8}}
\newlabel{LagrangianCMMF}{{4}{8}}
\@writefile{lop}{\contentsline {program}{\numberline {1}{\ignorespaces \relax \fontsize  {9}{11}\selectfont  \abovedisplayskip 8.5\p@ plus3\p@ minus4\p@ \abovedisplayshortskip \z@ plus2\p@ \belowdisplayshortskip 4\p@ plus2\p@ minus2\p@ \def \leftmargin \leftmargini \parsep 4\p@ plus2\p@ minus\p@ \topsep 8\p@ plus2\p@ minus4\p@ \itemsep 4\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 4\p@ plus2\p@ minus2\p@ \parsep 2\p@ plus\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip \texttt  {CMMF\_ALS($\mathbf  {X}\in \mathbb  {R}^{I \times J}$,$\mathbf  {Y}\in \mathbb  {R}^{I \times M}$,$\mathbf  {Z}\in \mathbb  {R}^{J \times N}$)}\relax }}{8}}
\newlabel{CMMFALS}{{1}{8}}
\citation{CMTF_OPT}
\@writefile{toc}{\contentsline {paragraph}{\numberline {3.3.2.2}\textbf  {Coupled matrix-tensor factorization (CMTF)}}{9}}
\@writefile{toc}{\contentsline {subparagraph}{\numberline {3.3.2.2.1}\textbf  {CMTF\_PARAFAC\_ALS}}{9}}
\newlabel{ApproximationPARAFAC}{{5}{9}}
\newlabel{LossFunction}{{6}{9}}
\@writefile{toc}{\contentsline {subparagraph}{\numberline {3.3.2.2.2}\textbf  {CMTF\_Tucker\_ALS}}{10}}
\newlabel{ApproximationTucker}{{6}{10}}
\newlabel{LossFunctionCMTFTuckerALS}{{7}{10}}
\newlabel{CMTFtuckerALSsteps}{{8}{11}}
\@writefile{lop}{\contentsline {program}{\numberline {2}{\ignorespaces \relax \fontsize  {9}{11}\selectfont  \abovedisplayskip 8.5\p@ plus3\p@ minus4\p@ \abovedisplayshortskip \z@ plus2\p@ \belowdisplayshortskip 4\p@ plus2\p@ minus2\p@ \def \leftmargin \leftmargini \parsep 4\p@ plus2\p@ minus\p@ \topsep 8\p@ plus2\p@ minus4\p@ \itemsep 4\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 4\p@ plus2\p@ minus2\p@ \parsep 2\p@ plus\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip \texttt  {CMTF\_Tucker\_ALS($\mathbf  {\relax $\@@underline {\hbox {X}}\mathsurround \z@ $\relax }\in \mathbb  {R}^{I \times J \times K}$,$\mathbf  {Y}\in \mathbb  {R}^{I \times M}$,$\mathbf  {Z}\in \mathbb  {R}^{J \times N}$, [$P,Q,R$])}\relax }}{12}}
\newlabel{CMTFtuckerALS}{{2}{12}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.3}Feature pattern similarity matrix generation}{12}}
\newlabel{PARAFAC_FPM}{{9}{12}}
\citation{BiMax}
\citation{BiMax}
\newlabel{Tucker_FPM}{{10}{13}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.4}Density-Invariant Biclustering}{13}}
\citation{DCB_PaoRecep}
\citation{BiMax,MatchScoreLiuWang}
\@writefile{lop}{\contentsline {program}{\numberline {3}{\ignorespaces \relax \fontsize  {9}{11}\selectfont  \abovedisplayskip 8.5\p@ plus3\p@ minus4\p@ \abovedisplayshortskip \z@ plus2\p@ \belowdisplayshortskip 4\p@ plus2\p@ minus2\p@ \def \leftmargin \leftmargini \parsep 4\p@ plus2\p@ minus\p@ \topsep 8\p@ plus2\p@ minus4\p@ \itemsep 4\p@ plus2\p@ minus\p@ {\leftmargin \leftmargini \topsep 4\p@ plus2\p@ minus2\p@ \parsep 2\p@ plus\p@ minus\p@ \itemsep \parsep }\belowdisplayskip \abovedisplayskip \texttt  {Biclusters} = \texttt  {DensityInvariantBiclustering($\mathbf  {X}\in \mathbb  {R}^{I \times J}$, $th$, $\alpha $, $\beta $)}\relax }}{15}}
\newlabel{DensityInvariantBiclustering}{{3}{15}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.5}Statistically significant bicluster selection}{15}}
\citation{W-Beijing-GlobalDissemination,W-Beijing-Review}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Biclustering results for each data configuration, including \texttt  {DensityInvariantBiclustering} parameters and number of density-invariant biclusters (DIB). For TBF, PARAFAC and Tucker3 model results are listed separately. For CMTBF, CMTF\_PARAFAC\_ALS and CMTF\_Tucker\_ALS results are listed separately. When there are no stable biclusters with average best-match score $\geq 95\%$, five most stable biclusters are picked as the stable biclusters.\relax }}{16}}
\newlabel{BiclusteringResults}{{2}{16}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{16}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Biclusters using spoligotypes and country of birth}{16}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Biclustering results on data configuration 1 using UBF. Biclusters associate spoligotypes to country of birth of patients. For spoligotypes, SIT number, major lineage based on CBN, and sublineage based on KBBN are listed. For countries, the name and the TB continent are listed. Biclusters B16 represents the well-known association between patients from Philippines and EAI2-Manila strains.\relax }}{17}}
\newlabel{BiclusteringResults_1}{{3}{17}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Incorporating time}{17}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Biclustering results on data configuration 2 using PARAFAC and Tucker3 models via UBF on the host-pathogen tensor. Bicluster B214 associates patients from India, Peru and Vietnam to 6 Euro-American strains, and the transmissive East Asian Beijing strain ST1. Bicluster B224 groups Mexican patients infected with three different Euro-American strains.\relax }}{18}}
\newlabel{BiclusteringResults_2}{{4}{18}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Incorporating time and distance measures}{18}}
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Biclustering results on data configuration 3,4,5 using CMTF\_PARAFAC\_ALS and CMTF\_Tucker\_ALS models via UBF on the coupled matrix-tensor. Bicluster B411 and B412 suggests that Euro-American strain ST908 and ST904 infects patients from four spatially close countries in Americas respectively. Bicluster B421 suggests that transmissive Beijing strain ST1 is wide-spread and infects patients from three different TB continents. Bicluster B422 groups patients from two neighbour countries, Malaysia and Philippines, who are infected with Beijing strain ST1 and X2 strain ST38.\relax }}{19}}
\newlabel{BiclusteringResults_3_4_5}{{5}{19}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Incorporating distance, but not time}{20}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Host-pathogen association within each major lineage}{20}}
\@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Biclustering results on data configuration 6 using CMMF\_ALS via UBF on the coupled matrix-matrix. Bicluster B64 groups patients from Bangladesh who are infected with two strains of not-well-defined sublineages: Indo-Oceanic EAI5 strain ST1391 and Euro-American T1 strain ST58.\relax }}{21}}
\newlabel{BiclusteringResults_6}{{6}{21}}
\@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces Biclustering results on data configuration 6 using CMMF\_ALS via UBF on the coupled matrix-matrix for each major lineage. Bicluster B712 suggests that Mexican patients are likely to be infected with UST251, ST478, and ST1154 strains, given that the pathogen is a Euro-American strain. Biclusters B741 groups 222 US patients and shows that US patients are very frequently infected with Beijing strains, including the transmissive ST1 strain. Biclusters B761 and B762 suggest that, given that MTBC is an \emph  {M. bovis} strain, it is more likely to infect a patient from Dominican Republic, if it is a BOV or BOV\_1 strain, and more likely to infect a US patient if it is a BOV\_2 strain.\relax }}{22}}
\newlabel{BiclusteringResults_7}{{7}{22}}
\citation{StatSigBioRelevance}
\bibstyle{ieeetr}
\bibdata{UBFbib}
\bibcite{WHO}{1}
\bibcite{HirshStableAssociationPNAS}{2}
\bibcite{VariableHostPathogen}{3}
\bibcite{HostPathogenMaps_BIBM_CAME}{4}
\bibcite{L2normMoreau}{5}
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion and Conclusion}{23}}
\bibcite{LanckrietStatFrameworkGDF}{6}
\bibcite{GenePriorGDFmoreau}{7}
\bibcite{TCF_MBT_BMC}{8}
\bibcite{GenomeInteractomeLageMoreau}{9}
\bibcite{KamerbeekSpoligotyping}{10}
\bibcite{InsightsIntoBIBM2011}{11}
\bibcite{InferredSpolIEEETNB2012}{12}
\bibcite{ChengChurch}{13}
\bibcite{DhillonSpectralBiclustering}{14}
\bibcite{KlugerSpectral}{15}
\bibcite{SAMBAtanay}{16}
\bibcite{CTWCgetz}{17}
\bibcite{BiMax}{18}
\bibcite{DECOBrecep}{19}
\bibcite{MadeiraSurvey}{20}
\bibcite{CBN}{21}
\bibcite{KBBN}{22}
\bibcite{SITVITWEB}{23}
\bibcite{CMTF_OPT}{24}
\bibcite{DCB_PaoRecep}{25}
\bibcite{MatchScoreLiuWang}{26}
\bibcite{W-Beijing-GlobalDissemination}{27}
\bibcite{W-Beijing-Review}{28}
\bibcite{StatSigBioRelevance}{29}
